Chapter 6 Diversity analyses

rm(list=ls()) #clear environment
load("data/squirrels_data.Rdata")
singlem <- read.csv("data/singlem.csv",sep=";",header=T)
options(contrasts = c('contr.sum','contr.poly'))

6.1 Data preparation

#Change genome names column to row names
genome_counts <- genome_counts %>%
  column_to_rownames(var="genome")
genome_gifts <- genome_gifts %>%
  column_to_rownames(var="genome")

#Get list of present MAGs
present_MAGs <- genome_counts %>%
  filter(rowSums(.[, -1]) != 0) %>%
  rownames()

#Remove samples with all zeros (no data after filtering)
genome_counts_filt <- genome_counts %>%
  select_if(~!all(. == 0))

#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]

genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
  select_if(~!all(. == 0)) %>%  #remove all-zero modules
  select_if(~!all(. == 1)) #remove all-one modules

#Align tree with present MAGs
tree_filt <- keep.tip(genome_tree,present_MAGs)

#Filter count table to only contain present MAGs after gifts filtering
genome_counts_filt <- genome_counts[present_MAGs,]

#Calculate sequence fractions for each samples
sequence_fractions <- read_counts %>%
  pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
  group_by(sample) %>%
  summarise(mags = sum(value)) %>%
  left_join(sample_metadata, by = join_by(sample == sample))  %>%
  select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
  mutate(mags_bases = mags*146) %>%
  mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
  mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
  mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
  select(sample,mags_bases,unmapped_bases,host_bases,lowqual_bases)

6.2 Alpha Diversity metrics

#ALPHA DIVERSITY
q0n <- hilldiv2::hilldiv(genome_counts,q=0) %>% c()
Neutral Hill numbers of q0
q1n <- hilldiv2::hilldiv(genome_counts,q=1) %>% c()
Neutral Hill numbers of q1
q1p <- hilldiv2::hilldiv(genome_counts,q=1,tree=genome_tree) %>% c()
Phylogenetic Hill numbers of q1
dist <- hilldiv2::traits2dist(genome_gifts_filt, method="gower")
q1f <- hilldiv2::hilldiv(genome_counts_filt,q=1,dist=dist) %>% c()
Functional Hill numbers of q1
# Merge all metrics
alpha_div <- cbind(sample=colnames(genome_counts),richness=q0n,neutral=round(q1n,3),phylo=round(q1p,3),func=round(q1f,3)) %>%
  as.data.frame()
columns <- c("richness","neutral","phylo","func", "mapped","total")

# Add amount of sequencing data to the table
alpha_div <- alpha_div %>%
  left_join(sequence_fractions, by = join_by(sample == sample)) %>% #add sequencing depth information
  mutate(mapped=round(mags_bases/1000000000,3)) %>% #modify depth to million reads
  mutate(total=round((mags_bases+unmapped_bases+host_bases+lowqual_bases)/1000000000,3)) %>%
  select(sample,richness,neutral,phylo,func,mapped,total) %>%
  mutate(across(-1, as.numeric))

squirrel_colors <- c("#999999", "#cc3333")

alpha_div %>%
  left_join(sample_metadata, by='sample') %>%
  select(sample, species, richness, neutral,phylo,func, mapped, total) %>%
  mutate(species = factor(species), # Convert to factor if necessary
        sample = factor(sample, levels = unique(sample)[order(species)])) %>%
  pivot_longer(-c(sample, species), names_to = "data", values_to = "value") %>%
  mutate(data = factor(data, levels = columns)) %>%
  ggplot(aes(x=value, y=sample, fill=species)) +
  geom_bar(stat='identity') +
  scale_fill_manual(values=squirrel_colors) +
  facet_wrap(~ data, scales="free_x", ncol=6) +
  #facet_grid(species ~ data, scales="free_x", space="fixed") +
  #force_panelsizes(ro#ws = 2, cols = 3, TRUE) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size=.1, color="grey"),
    panel.spacing = unit(0, "lines"),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_blank(),
    legend.position = "none"
  )

# #table
# kable(alpha_div)

6.3 Alpha Diversity comparisons

6.3.1 By species and sex

squirrel_colors <- c("#999999", "#cc3333")

sex_colors <- c("turquoise3", "indianred2")


neutral.sex <- alpha_div %>%
            select(sample,neutral) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "sex", fill="white", add="jitter") +
                  scale_color_manual(values=sex_colors) +
                  scale_fill_manual(values=paste0(squirrel_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Neutral Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())

phylo.sex <- alpha_div %>%
            select(sample,phylo) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "sex", fill="white", add="jitter") +
                  scale_color_manual(values=sex_colors) +
                  scale_fill_manual(values=paste0(squirrel_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Phylogenetic Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())

func.sex <- alpha_div %>%
            select(sample,func) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "sex", fill="white", add="jitter") +
                  scale_color_manual(values=sex_colors) +
                  scale_fill_manual(values=paste0(squirrel_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Functional Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())


sex.legend <- get_legend(neutral.sex)


ggarrange(neutral.sex, phylo.sex, func.sex, #+ rremove("x.text"), 
          legend.grob = sex.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 3)

6.3.2 By species and urbanisation

sample_metadata$area_type <-factor(sample_metadata$area_type, levels = c("rural", "suburban", "urban"))
area_colors <- c("#76b183","#d57d2c","#6b7398")

#neutral alpha by species*area_type
neutral.urb <- alpha_div %>%
            select(sample,neutral) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "area_type", fill="white", add="jitter") +
            scale_color_manual(values=area_colors) +
            scale_fill_manual(values=paste0(area_colors)) +
            stat_compare_means() +
            theme_classic() +
            labs(y = "Neutral Hill numbers") +
            theme(
              legend.position = "none",
              axis.title.x = element_blank()) +
            guides(color=guide_legend(title="Urbanisation"), fill="none")

#phylogenetic alpha by species*area_type
phylo.urb <- alpha_div %>%
            select(sample,phylo) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "area_type", fill="white", add="jitter") +
                  scale_color_manual(values=area_colors) +
                  scale_fill_manual(values=paste0(area_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Phylogenetic Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())

#functional (distillr-based) alpha by species*area_type
func.urb <- alpha_div %>%
            select(sample,func) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "area_type", fill="white", add="jitter") +
                  scale_color_manual(values=area_colors) +
                  scale_fill_manual(values=paste0(area_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Functional Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())


urb.legend <- get_legend(neutral.urb)


ggarrange(neutral.urb, phylo.urb, func.urb, #+ rremove("x.text"), 
          legend.grob = urb.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 3)

6.3.3 By species and season

sample_metadata$season <-factor(sample_metadata$season, levels = c("spring-summer", "autumn", "winter"))
season_colors <- c("#76b183","#e5bd5b","#6b7398")

#neutral alpha by species*season
neutral.seas <- alpha_div %>%
            select(sample,neutral) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "season", fill="white", add="jitter") +
            scale_color_manual(values=season_colors) +
            scale_fill_manual(values=paste0(season_colors)) +
            stat_compare_means() +
            theme_classic() +
            labs(y = "Neutral Hill numbers") +
            theme(
              legend.position = "none",
              axis.title.x = element_blank()) +
            guides(color=guide_legend(title="Season"), fill="none")

#phylogenetic alpha by species*season
phylo.seas <- alpha_div %>%
            select(sample,phylo) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "season", fill="white", add="jitter") +
                  scale_color_manual(values=season_colors) +
                  scale_fill_manual(values=paste0(season_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Phylogenetic Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())

#functional (distillr-based) alpha by species*season
func.seas <- alpha_div %>%
            select(sample,func) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "season", fill="white", add="jitter") +
                  scale_color_manual(values=season_colors) +
                  scale_fill_manual(values=paste0(season_colors)) +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Functional Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())


seas.legend <- get_legend(neutral.seas)


ggarrange(neutral.seas, phylo.seas, func.seas, #+ rremove("x.text"), 
          legend.grob = seas.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 3)

6.3.4 By species and development

#neutral alpha by species*season
neutral.dev <- alpha_div %>%
            select(sample,neutral) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "development", fill="white", add="jitter") +
            stat_compare_means() +
            theme_classic() +
            labs(y = "Neutral Hill numbers") +
            theme(
              legend.position = "none",
              axis.title.x = element_blank()) +
            guides(color=guide_legend(title="Development"), fill="none")

#phylogenetic alpha by species*season
phylo.dev <- alpha_div %>%
            select(sample,phylo) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "development", fill="white", add="jitter") +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Phylogenetic Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())

#functional (distillr-based) alpha by species*season
func.dev <- alpha_div %>%
            select(sample,func) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == sample)) %>%
            ggboxplot(., x = "species", y = "value", color = "development", fill="white", add="jitter") +
                  stat_compare_means() +
                  theme_classic() +
                  labs(y = "Functional Hill numbers") +
                  theme(
                      legend.position = "none",
                      axis.title.x = element_blank())


dev.legend <- get_legend(neutral.seas)


ggarrange(neutral.dev, phylo.dev, func.dev, #+ rremove("x.text"), 
          legend.grob = seas.legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 3)

6.3.5 Alpha diversity and sequencing effort

#sequencing effort and diversity
ggplot(alpha_div, aes(x=mapped,y=neutral,label=sample)) +
  geom_smooth(method='lm', formula= y~x, color='#e08dde', fill='#e08dde') +
  geom_point(alpha=0.5, color="#6c9ebc") +
  geom_label_repel(max.overlaps = 100, cex=0.7) +
  labs(x = "GBs mapped to MAGs", y = "Neutral diversity (effective number of MAGs)") +
  theme_classic() +
  theme(legend.position="none")

6.4 Alpha diversity models

6.4.1 Data preparation for GLMMs

diversity.data <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  mutate(season=factor(season, levels = c("spring-summer", "autumn", "winter"))) %>%
  right_join(singlem, by = join_by(sample == sample)) %>%
  group_by(species) %>%
  mutate(index500_st = scale(index500, center=T, scale=T)[,1]) %>%
  ungroup() %>%
  filter(sample!="EHI02263") %>% #remove outlier
  filter(development=="Adult") #remove juveniles, nursing and pregnant females

#check whether a low domain-adjusted mapping rate (DAMR) is associated with low diversity estimates 
ggplot(diversity.data, aes(x=est_mapp, y=neutral)) +
  geom_point(size=3, alpha=0.5, color="#6c9ebc") +
  labs(x = "DAMR (mapping rate to MAG catalogue/singleM microbial fraction estimate)", y = "Neutral diversity (effective number of MAGs)") +
  theme_classic() +
  theme(legend.position="none")

diversity.data <- diversity.data %>%
  filter(mags_singlem > 0.8) #remove 5 samples with low DAMR
# check y distributions
plot(diversity.data$neutral)
hist(diversity.data$neutral, breaks=30)
d <- density(diversity.data$neutral)
plot(d)

plot(diversity.data$phylo)
hist(diversity.data$phylo, breaks=30)
d <- density(diversity.data$phylo)
plot(d)

plot(diversity.data$func)
hist(diversity.data$func, breaks=30)
d <- density(diversity.data$func)
plot(d)

6.4.2 GLMM - neutral alpha

neutral.model <-lmer(neutral ~ species + index500 + season + 
            + species:index500 + species:season 
            + (1|animal),
            data=diversity.data, na.action=na.omit,
            control=lmerControl(optimizer="bobyqa",
                                optCtrl=list(maxfun=2e5)))
boundary (singular) fit: see help('isSingular')
summary(neutral.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: neutral ~ species + index500 + season + +species:index500 + species:season +      (1 | animal)
   Data: diversity.data
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))

REML criterion at convergence: 1390.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3437 -0.6771 -0.0379  0.5914  2.7877 

Random effects:
 Groups   Name        Variance  Std.Dev. 
 animal   (Intercept) 4.566e-11 6.758e-06
 Residual             6.013e+02 2.452e+01
Number of obs: 155, groups:  animal, 97

Fixed effects:
                  Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)         77.557      3.720 147.000  20.850  < 2e-16 ***
species1            36.192      3.720 147.000   9.730  < 2e-16 ***
index500           -25.246      8.411 147.000  -3.002  0.00316 ** 
season1              4.074      2.965 147.000   1.374  0.17157    
season2             -3.995      2.715 147.000  -1.471  0.14334    
species1:index500   -5.723      8.411 147.000  -0.680  0.49732    
species1:season1    -2.349      2.965 147.000  -0.792  0.42951    
species1:season2    -2.783      2.715 147.000  -1.025  0.30700    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) specs1 ind500 seasn1 seasn2 s1:500 spc1:1
species1     0.069                                          
index500    -0.835 -0.158                                   
season1      0.017 -0.022  0.065                            
season2     -0.045  0.057 -0.042 -0.505                     
spcs1:nd500 -0.158 -0.835  0.316  0.082 -0.097              
specs1:ssn1 -0.022  0.017  0.082  0.150 -0.085  0.065       
specs1:ssn2  0.057 -0.045 -0.097 -0.085  0.080 -0.042 -0.505
optimizer (bobyqa) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
Anova(neutral.model, test.statistic='F', type=3) 
Analysis of Deviance Table (Type III Wald F tests with Kenward-Roger df)

Response: neutral
                        F Df  Df.res    Pr(>F)    
(Intercept)      428.7763  1  71.918 < 2.2e-16 ***
species           93.3722  1  71.918 1.246e-14 ***
index500           8.8875  1  83.021  0.003767 ** 
season             1.3256  2 122.519  0.269420    
species:index500   0.4567  1  83.021  0.501052    
species:season     1.6519  2 122.519  0.195935    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check_model(neutral.model) #not giving the full panel in rmd
resid_panel(neutral.model, smoother = TRUE, qqbands = TRUE)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

neutral.model.eff <- allEffects(neutral.model) 
plot(neutral.model.eff)

# plot(effect('species', neutral.model))
# plot(effect('index:500', neutral.model))

6.4.3 GLMM - phylogenetic alpha

phylo.model<-lmer(phylo ~ species + index500 + season
                    + species:index500 + species:season 
                    + (1|animal),
                    data=diversity.data, na.action=na.omit,
                    control=lmerControl(optimizer="bobyqa",
                                        optCtrl=list(maxfun=2e5)))

summary(phylo.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: phylo ~ species + index500 + season + species:index500 + species:season +      (1 | animal)
   Data: diversity.data
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))

REML criterion at convergence: 491.3

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.60908 -0.64709  0.05091  0.61014  2.19447 

Random effects:
 Groups   Name        Variance Std.Dev.
 animal   (Intercept) 0.09958  0.3156  
 Residual             1.23320  1.1105  
Number of obs: 155, groups:  animal, 97

Fixed effects:
                   Estimate Std. Error        df t value Pr(>|t|)    
(Intercept)         5.23221    0.18080  72.98166  28.940  < 2e-16 ***
species1            0.87423    0.18080  72.98166   4.835 7.17e-06 ***
index500           -1.09429    0.40695  83.37873  -2.689  0.00865 ** 
season1             0.21802    0.13778 128.45516   1.582  0.11601    
season2            -0.11607    0.12548 113.53152  -0.925  0.35691    
species1:index500   0.38053    0.40695  83.37873   0.935  0.35245    
species1:season1   -0.28996    0.13778 128.45516  -2.105  0.03728 *  
species1:season2    0.06562    0.12548 113.53152   0.523  0.60204    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) specs1 ind500 seasn1 seasn2 s1:500 spc1:1
species1     0.071                                          
index500    -0.837 -0.161                                   
season1      0.011 -0.030  0.069                            
season2     -0.040  0.060 -0.047 -0.508                     
spcs1:nd500 -0.161 -0.837  0.318  0.089 -0.103              
specs1:ssn1 -0.030  0.011  0.089  0.155 -0.096  0.069       
specs1:ssn2  0.060 -0.040 -0.103 -0.096  0.086 -0.047 -0.508
Anova(phylo.model, test.statistic='F', type=3)
Analysis of Deviance Table (Type III Wald F tests with Kenward-Roger df)

Response: phylo
                        F Df  Df.res    Pr(>F)    
(Intercept)      828.4542  1  75.041 < 2.2e-16 ***
species           23.1286  1  75.041 7.625e-06 ***
index500           7.1502  1  85.393   0.00898 ** 
season             1.2371  2 118.803   0.29394    
species:index500   0.8646  1  85.393   0.35507    
species:season     2.3696  2 118.803   0.09792 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resid_panel(phylo.model, smoother = TRUE, qqbands = TRUE)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

phylo.model.eff <- allEffects(phylo.model) 
plot(phylo.model.eff)

# 
# plot(effect('species', phylo.model))
# plot(effect('area_type', phylo.model))

6.4.4 GLMM - functional (distillr-based) alpha

func.model<-lmer(func ~ species + index500 + season 
                  + species:index500 + species:season 
                  + (1|animal),
                  data=diversity.data, na.action=na.omit,
                  control=lmerControl(optimizer="bobyqa",
                                      optCtrl=list(maxfun=2e5))) 
  

summary(func.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: func ~ species + index500 + season + species:index500 + species:season +      (1 | animal)
   Data: diversity.data
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))

REML criterion at convergence: -299.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.8239 -0.2954  0.0767  0.5142  2.9871 

Random effects:
 Groups   Name        Variance  Std.Dev.
 animal   (Intercept) 0.0004508 0.02123 
 Residual             0.0056842 0.07539 
Number of obs: 155, groups:  animal, 97

Fixed effects:
                    Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)         1.428370   0.012260  74.547352 116.502  < 2e-16 ***
species1            0.026898   0.012260  74.547352   2.194  0.03136 *  
index500           -0.054164   0.027599  84.921917  -1.963  0.05297 .  
season1             0.019183   0.009350 129.217452   2.052  0.04222 *  
season2            -0.016192   0.008516 114.714250  -1.901  0.05977 .  
species1:index500   0.029131   0.027599  84.921917   1.056  0.29418    
species1:season1   -0.025732   0.009350 129.217452  -2.752  0.00677 ** 
species1:season2    0.017052   0.008516 114.714250   2.002  0.04761 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) specs1 ind500 seasn1 seasn2 s1:500 spc1:1
species1     0.071                                          
index500    -0.837 -0.161                                   
season1      0.011 -0.030  0.069                            
season2     -0.040  0.060 -0.047 -0.508                     
spcs1:nd500 -0.161 -0.837  0.318  0.089 -0.103              
specs1:ssn1 -0.030  0.011  0.089  0.155 -0.096  0.069       
specs1:ssn2  0.060 -0.040 -0.103 -0.096  0.086 -0.047 -0.508
Anova(func.model, test.statistic='F', type=3)
Analysis of Deviance Table (Type III Wald F tests with Kenward-Roger df)

Response: func
                          F Df  Df.res  Pr(>F)    
(Intercept)      13425.5643  1  74.994 < 2e-16 ***
species              4.7609  1  74.994 0.03225 *  
index500             3.8086  1  85.357 0.05427 .  
season               2.5527  2 118.866 0.08213 .  
species:index500     1.1017  1  85.357 0.29686    
species:season       3.9545  2 118.866 0.02174 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resid_panel(func.model, smoother = TRUE, qqbands = TRUE)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

# func.model %>% tbl_regression() %>%
#   add_global_p()
func.model.eff <- allEffects(func.model)
plot(func.model.eff)

6.5 Neutral beta diversity

#neutral beta div
beta_q1n <-hilldiv2::hillpair(genome_counts, q=1, metric="S")
#neutral beta diversity PERMANOVA

sample_metadata_adonis <- sample_metadata %>%
  filter(sample %in% labels(beta_q1n)) %>%
  arrange(sample) %>%
  #mutate(location=paste0(round(longitude,2),"_",round(latitude,2))) %>%
  select(sample,species,sex,development,area_type,macroarea,season) %>%
  select_if(~ length(unique(.)) > 1) %>% #remove columns with all-identical values
  column_to_rownames(var = "sample") %>%
  as.data.frame()

adonis2(formula=beta_q1n ~ ., data=sample_metadata_adonis[labels(beta_q1n),], permutations=999) %>%
  as.matrix() %>%
  print()
             Df   SumOfSqs         R2         F Pr(>F)
species       1 12.6575822 0.15490727 43.167294  0.001
sex           1  0.5155096 0.00630896  1.758089  0.019
development   3  1.3781851 0.01686664  1.566716  0.004
area_type     2  2.4213864 0.02963365  4.128936  0.001
macroarea    12 13.9088557 0.17022073  3.952885  0.001
season        2  1.5679578 0.01918914  2.673674  0.001
Residual    168 49.2612260 0.60287360        NA     NA
Total       189 81.7107028 1.00000000        NA     NA

6.5.1 Beta diversity by species and urbanization

beta_q1n_nmds <- beta_q1n %>%
  metaMDS(.,trymax = 500, k=2, verbosity=FALSE) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample))
Run 0 stress 0.101802 
Run 1 stress 0.1015705 
... New best solution
... Procrustes: rmse 0.007913566  max resid 0.01751894 
Run 2 stress 0.1015918 
... Procrustes: rmse 0.003508095  max resid 0.01417502 
Run 3 stress 0.1015779 
... Procrustes: rmse 0.001291895  max resid 0.002968043 
... Similar to previous best
Run 4 stress 0.1015838 
... Procrustes: rmse 0.00362677  max resid 0.008496499 
... Similar to previous best
Run 5 stress 0.1015583 
... New best solution
... Procrustes: rmse 0.001587218  max resid 0.008320425 
... Similar to previous best
Run 6 stress 0.1017783 
... Procrustes: rmse 0.008331819  max resid 0.01764896 
Run 7 stress 0.1018517 
... Procrustes: rmse 0.00762316  max resid 0.01611824 
Run 8 stress 0.1015614 
... Procrustes: rmse 0.002499159  max resid 0.005716167 
... Similar to previous best
Run 9 stress 0.101594 
... Procrustes: rmse 0.002303628  max resid 0.01030644 
Run 10 stress 0.1017918 
... Procrustes: rmse 0.008582425  max resid 0.01728706 
Run 11 stress 0.1018414 
... Procrustes: rmse 0.008147174  max resid 0.01452672 
Run 12 stress 0.1016117 
... Procrustes: rmse 0.002186178  max resid 0.02092318 
Run 13 stress 0.1017876 
... Procrustes: rmse 0.00798022  max resid 0.01642904 
Run 14 stress 0.1015978 
... Procrustes: rmse 0.004311595  max resid 0.01474596 
Run 15 stress 0.1015849 
... Procrustes: rmse 0.005718846  max resid 0.01341926 
Run 16 stress 0.101589 
... Procrustes: rmse 0.003017177  max resid 0.01423571 
Run 17 stress 0.1015902 
... Procrustes: rmse 0.00273248  max resid 0.01416749 
Run 18 stress 0.1015956 
... Procrustes: rmse 0.004499008  max resid 0.01454033 
Run 19 stress 0.1015897 
... Procrustes: rmse 0.001173086  max resid 0.01317665 
Run 20 stress 0.1016014 
... Procrustes: rmse 0.002388214  max resid 0.0168364 
*** Best solution repeated 2 times
beta_q1n_nmds %>%
  group_by(species) %>%
  filter(sample !="EHI00420") %>% #remove outliers
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(., aes(x=NMDS1,y=NMDS2, color=area_type, shape=species, label=sample)) +
  scale_color_manual(values=area_colors) +
  geom_point(size=2.5) + #geom_text(hjust=2, vjust=0) +
  geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
  theme_classic() +
  theme(legend.position="right", legend.box="vertical") +
  guides(color=guide_legend(title="Species"), shape=guide_legend(title="Area type"))

### Beta diversity by species and season

beta_q1n_nmds %>%
  group_by(species) %>%
  filter(sample !="EHI00420") %>% #remove outliers
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(., aes(x=NMDS1,y=NMDS2, color=season, shape=species)) +
  scale_color_manual(values=season_colors) +
  geom_point(size=2.5) +
  geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
  theme_classic() +
  theme(legend.position="right", legend.box="vertical") +
  guides(color=guide_legend(title="Species"), shape=guide_legend(title="Season"))

6.5.2 Beta diversity by species and sex

beta_q1n_nmds %>%
  group_by(species) %>%
  filter(sample !="EHI00420") %>% #remove outliers
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(., aes(x=NMDS1,y=NMDS2, color=sex, shape=species)) +
  geom_point(size=2.5) +
  geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
  theme_classic() +
  theme(legend.position="right", legend.box="vertical") +
  guides(color=guide_legend(title="Species"), shape=guide_legend(title="Sex"))

6.5.3 Beta diversity by species and development

beta_q1n_nmds %>%
  group_by(species) %>%
  filter(sample !="EHI00420") %>% #remove outliers
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(., aes(x=NMDS1,y=NMDS2, color=development, shape=species)) +
  geom_point(size=2.5) +
  geom_segment(aes(x=x_cen, y=y_cen, xend=NMDS1, yend=NMDS2), alpha=0.2) +
  theme_classic() +
  theme(legend.position="right", legend.box="vertical") +
  guides(color=guide_legend(title="Species"), shape=guide_legend(title="Development"))